In [97]:
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.plotly as ply

In [75]:
import matplotlib.pyplot as plt
import matplotlib
color = sns.color_palette()

In [11]:
data_path = 'dataset/us-mass-shootings-last-50-years/Mass Shootings Dataset.csv'
data_path2 = 'dataset/us-mass-shootings-last-50-years/Mass Shootings Dataset Ver 2.csv'

In [51]:
data1 = pd.read_csv(data_path,encoding = "ISO-8859-1", parse_dates=["Date"])
data2 = pd.read_csv(data_path2,encoding = "ISO-8859-1", parse_dates=["Date"])

In [52]:
data1.head()


Out[52]:
S# Title Location Date Summary Fatalities Injured Total victims Mental Health Issues Race Gender Latitude Longitude
0 1 Las Vegas Strip mass shooting Las Vegas, NV 2017-10-01 NaN 58 515 573 Unclear NaN NaN NaN NaN
1 2 San Francisco UPS shooting San Francisco, CA 2017-06-14 Jimmy Lam, 38, fatally shot three coworkers an... 3 2 5 Yes Asian M NaN NaN
2 3 Pennsylvania supermarket shooting Tunkhannock, PA 2017-06-07 Randy Stair, a 24-year-old worker at Weis groc... 3 0 3 Unclear White M NaN NaN
3 4 Florida awning manufacturer shooting Orlando, Florida 2017-06-05 John Robert Neumann, Jr., 45, a former employe... 5 0 5 Unclear NaN M NaN NaN
4 5 Rural Ohio nursing home shooting Kirkersville, Ohio 2017-05-12 Thomas Hartless, 43, shot and killed a former ... 3 0 3 Yes White M NaN NaN

In [53]:
data2.head()


Out[53]:
S# Title Location Date Summary Fatalities Injured Total victims Mental Health Issues Race Gender Latitude Longitude
0 1 Las Vegas Strip mass shooting Las Vegas, NV 2017-10-01 NaN 58 527 585 Unclear White M 36.181271 -115.134132
1 2 San Francisco UPS shooting San Francisco, CA 2017-06-14 Jimmy Lam, 38, fatally shot three coworkers an... 3 2 5 Yes Asian M NaN NaN
2 3 Pennsylvania supermarket shooting Tunkhannock, PA 2017-06-07 Randy Stair, a 24-year-old worker at Weis groc... 3 0 3 Unclear White M NaN NaN
3 4 Florida awning manufacturer shooting Orlando, Florida 2017-06-05 John Robert Neumann, Jr., 45, a former employe... 5 0 5 Unclear NaN M NaN NaN
4 5 Rural Ohio nursing home shooting Kirkersville, Ohio 2017-05-12 Thomas Hartless, 43, shot and killed a former ... 3 0 3 Yes White M NaN NaN

1.How many people got killed and injured per year?

Total Victims per Incident on an avg


In [55]:
float(data2['Total victims'].sum())/len(data2['Total victims'])


Out[55]:
10.1875

Total Victims per year on an avg


In [59]:
years = [y.year for y in data2['Date']]
ylen = len(set(years))
float(data2['Total victims'].sum())/ylen


Out[59]:
77.61904761904762

Avg injured people


In [60]:
float(data2['Injured'].sum())/ylen


Out[60]:
46.95238095238095

In [61]:
float(data2['Injured'].sum())


Out[61]:
1972.0

Avg Fatalities


In [44]:
float(data2['Fatalities'].sum())/ylen


Out[44]:
33.333333333333336

In [ ]:


In [64]:
plt.figure(figsize=(10,8))
plt.scatter(np.sort(data2['Date']),np.sort(data2['Total victims'].valaues))
plt.xlabel('Years')
plt.ylabel('No of victims')
plt.show()


Attacks per year


In [ ]:


In [95]:
cnt_ms = data2['Date'].dt.year.value_counts()
plt.figure(figsize=(12,10))
sns.barplot(cnt_ms.index,cnt_ms.values)
plt.xticks(rotation = 'vertical')
plt.title('Attcaks over years')
plt.show()


Overall no.of Attacks per Month.


In [93]:
cnt_ms = data2['Date'].dt
cnt_ms = cnt_ms.month.value_counts()
sns.barplot(cnt_ms.index,cnt_ms.values)
plt.xticks(rotation = 'vertical')
plt.title('Attcaks over years')
plt.show()